Round 1: Top-level cell type annotation

mkdir -p result/step2/bbknn/

[ -f result/step2/matrix.batch.gz ] || \
    gzip -cd result/step1/matrix_final.cols.gz | \
        awk -F'_' '{ print $2 }' | \
        gzip -c > result/step2/matrix.batch.gz

[ -f result/step2/bbknn/total.mtx.gz ] || \
    mmutil_bbknn \
        --mtx result/step1/matrix_final.mtx.gz \
        --col result/step1/matrix_final.cols.gz \
        --batch result/step2/matrix.batch.gz \
        --knn 100 --rank 100 --log_scale \
        --out result/step2/bbknn/total \
        2> result/step2/bbknn/total.log
mkdir -p result/step2/assignment/

[ -f result/step2/assignment/round1.annot.gz ] || \
    mmutil_annotate_col \
        --svd_u result/step2/bbknn/total.svd_U.gz \
        --svd_d result/step2/bbknn/total.svd_D.gz \
        --svd_v result/step2/bbknn/total.factors.gz \
        --row result/step1/matrix_final.rows.gz \
        --col result/step2/bbknn/total.cols.gz \
        --ann marker/surface_round1_positive.txt \
        --anti marker/surface_round1_negative.txt \
        --em_iter 1000 --kappa_max 100 --em_tol 1e-8 \
        --out result/step2/assignment/round1 \
        2> result/step2/assignment/round1.log
if ! [ -f result/step2/protein.mtx.gz ] ; then
    gzip -cd result/step1/matrix_final.rows.gz | \
        awk '/anti_/ || /CCR/ || /CXCR/' | \
        gzip > result/step2/protein.select.gz

    mmutil_select_row \
        result/step1/matrix_final.mtx.gz \
        result/step1/matrix_final.rows.gz \
        result/step1/matrix_final.cols.gz \
        result/step2/protein.select.gz \
        result/step2/protein
fi
prot.raw.mtx <- Matrix::readMM("result/step2/protein.mtx.gz") %>% as.matrix()
rownames(prot.raw.mtx) <- .fread("result/step2/protein.rows.gz") %>% unlist
colnames(prot.raw.mtx) <- .fread("result/step2/protein.cols.gz") %>% unlist

features <- .fread("result/step1/matrix_final.rows.gz") %>% unlist
.idx <- which(str_starts(features, "anti_"))
.rows <- features[.idx]
U <- .fread("result/step2/bbknn/total.svd_U.gz") %>% as.matrix
U <- U[.idx, , drop = FALSE]
D <- .fread("result/step2/bbknn/total.svd_D.gz") %>% as.matrix
V <- .fread("result/step2/bbknn/total.factors.gz") %>% as.matrix

prot.mtx <- sweep(U, 2, D, `*`) %*% t(V)
rownames(prot.mtx) <- .rows
colnames(prot.mtx) <- .fread("result/step2/bbknn/total.cols.gz") %>% unlist
.col <- c("tag", "celltype", "prob", "ln.prob")
annot.dt <- .fread("result/step2/assignment/round1.annot.gz", col.names=.col)
annot.dt[, c("barcode","batch") := tstrsplit(tag, split="_")]
.ct <- c("mTreg","nTreg","mTconv","nTconv")
plt <- plt.scatter.ct.2(.ct, annot.dt, prot.raw.mtx)
print(plt)

.file <- fig.dir %&% "/Fig_round1_cdmarker_raw.pdf"
.gg.save(filename = .file, plot = plt, width=6, height=4)

PDF

.ct <- c("mTreg","nTreg","mTconv","nTconv")
plt <- plt.scatter.ct.2(.ct, annot.dt, pmax(exp(prot.mtx) - 1, 0))
print(plt)

.file <- fig.dir %&% "/Fig_round1_cdmarker_bbknn.pdf"
.gg.save(filename = .file, plot = plt, width=6, height=4)

PDF

UMAP to confirm cell type assignment results

svd.file <- "result/step2/bbknn/total.factors.gz"
out.file <- "result/step2/umap/bbknn_total.gz"

.umap.dt <- read.umap(svd.file, out.file, npc=10)

.col.file <- "result/step1/matrix_final.cols.gz"

plt <- plot.three.umap(.umap.dt, annot.dt, .col.file)

print(plt)

.file <- fig.dir %&% "/Fig_annotation_umap_bbknn.pdf"
.gg.save(filename = .file, plot = plt, width=3.5, height=8)

PDF

Q/C by marker genes

mkdir -p result/step2/marker/

echo "FOXP3 ID3 BACH2 CXCR3 PRDM1 SGK1 TCF7 LEF1 SELL IL2RA IL7R IKZF2 CCR6 CCR4 CCR7 CTLA4 HLA-DRA anti_CD25 anti_CD127 anti_CD183 anti_CD196 anti_CD197 anti_CD194 anti_CD45RA anti_CD45RO anti_HLA" > result/step2/marker/marker.select

if ! [ -f result/step2/marker/matrix.mtx.gz ] ; then
    mmutil_select_row \
        result/step1/matrix_final.mtx.gz \
        result/step1/matrix_final.rows.gz \
        result/step1/matrix_final.cols.gz \
        result/step2/marker/marker.select \
        result/step2/marker/matrix
fi
.markers <- sort(as.character(unique(marker.dt$marker)))
for(g in .markers){
    plt <- plot.marker.umap(g)
    print(plt)
    .file <- fig.dir %&% "/Fig_marker_" %&% g %&% "_umap.pdf"
    .gg.save(filename = .file, plot = plt, width=3.2, height=3)    
}

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

PDF

Basic statistics for the first round annotation (29,179 cells)

.hash.info <-
    readxl::read_xlsx("data/Hashing list MS Treg project.xlsx", 1) %>%
    mutate(hash = str_remove(hash,"#")) %>%
    mutate(hash = as.integer(hash)) %>%
    rename(Sample = `Cell type`)

.dt <-
    annot.dt %>%
    mutate(batch=as.integer(batch)) %>%
    left_join(fread("data/hashtag.tsv.gz",header=T)) %>%
    left_join(.hash.info) %>%
    mutate(ct=factor(celltype, c("nTconv","mTconv","nTreg","mTreg"))) %>%
    filter(batch != 2, !is.na(ct), !is.na(Sample))

Combining all the experiments

PDF

PDF

Total CD4 samples

PDF

PDF

CD25 enriched samples

PDF

PDF